/*==============================================================================
Replication package for the BPEA Spring 2024 paper:

"The Emergence of a Uniform Business Cycle in the United States: 
Evidence from New Claims-Based Unemployment Data"

Andrew Fieldhouse, David Munro, Christoffer Koch, and Sean Howard

Stata code creating the panel dataset (LPIV_DATA.dta) used in the Rep_Stata_LPIV.do replication package for Figure 5
==============================================================================*/

clear all

*Set Your Directory to Replication Package Folder:

cd "/Users/afieldhouse/Dropbox/Research/Historical Claims Project/continuous_updates/Replication Package_BPEA_CBUR/Rep_Code/"

local dirdata "/Users/afieldhouse/Dropbox/Research/Historical Claims Project/continuous_updates/Replication Package_BPEA_CBUR/Rep_Data/"
local dircbur "/Users/afieldhouse/Dropbox/Research/Historical Claims Project/continuous_updates/Replication Package_BPEA_CBUR/CBUR Data/"

*** Import monthly state-level CBUR data and convert to CY ***

clear

import excel "`dircbur'CBUR Data.xlsx", firstrow

gen year = yofd(Date)
	la var year "year"

collapse UR_Claims_3MA, by(State year)
	rename UR_Claims_3MA CBUR
	replace CBUR = CBUR/100
	la var CBUR "Claims-Based Unemployment Rate"
	
save "`dirdata'/CBUR_CY.dta", replace	

*** Import monthly U.S. CBUR data and convert to CY ***

clear

import excel "`dircbur'CBUR Data.xlsx", firstrow

gen year = yofd(Date)
	la var year "year"

collapse UR_Claims_US3MA, by(State year)
	rename UR_Claims_US3MA CBUR_nat
	replace CBUR_nat = CBUR_nat/100
	la var CBUR_nat "U.S. Claims-Based Unemployment Rate"
	
save "`dirdata'/US_CBUR_CY.dta", replace


*** Import monthly state-level nonfarm payroll employment data and convert to CY ***

clear

import excel "`dircbur'CBUR Data.xlsx", firstrow

gen year = yofd(Date)
	la var year "year"

collapse nonfarm_NSA, by(State year)
	rename nonfarm_NSA NFE
	la var NFE "Nonfarm Payroll Employment"

gen lnNFE = log(NFE)
	la var lnNFE "Log Nonfarm Payroll Employment"

save "`dirdata'/NFE_CY.dta", replace


*** Import quarterly state-level population data and convert to CY ***

clear
 
import excel "`dirdata'LPIV_Data_Import.xlsx", sheet("Population_Q") firstrow

collapse FIPS POP, by(State year)

gen lnPOP = log(POP)
	la var lnPOP "Log Population"
	
save "`dirdata'/POP_CY.dta", replace


*** Import annual U.S. population and nonfarm payroll employment data ***
* 	Note: We use aggrgeate U.S. data in constructing relative population and employment because of missing state-level observations for AK, HI, MI, and MN in the 1940s-50s

clear

import excel "`dirdata'LPIV_Data_Import.xlsx", sheet("US_Pop_Emp_Annual") firstrow

gen lnNFE_nat = log(US_NFE)
	la var lnNFE_nat "Log U.S. Nonfarm Payroll Employment"
	
gen lnPOP_nat = log(US_POP)
	la var lnPOP_nat "Log U.S. Population"

save "`dirdata'/US_NFE_POP.dta", replace

*** Merge annual frequecny datasets ***

clear 
u "`dirdata'CBUR_CY.dta"

merge 1:1 year State using "`dirdata'/US_CBUR_CY.dta"
drop _merge

merge 1:1 year State using "`dirdata'/NFE_CY.dta"
drop _merge

merge 1:1 year State using "`dirdata'/POP_CY.dta"
drop _merge

drop if year <1946

merge 1:1 year State using "`dirdata'/US_NFE_POP.dta"
drop _merge

drop if year >2022

rename FIPS fips

*merge 1:1 year fips using "`dirdata'/Bartik_Full.dta"
merge 1:1 year fips using "`dirdata'/Bartik_Baseline.dta"
drop _merge

*** Generate relative labor market variables

gen relnNFE = lnNFE-lnNFE_nat
	la var relnNFE "Relative Log Nonfarm Payroll Employment"
		
gen relCBUR = CBUR - CBUR_nat
	la var relCBUR "Relative Claims-Based Unemployment Rate"
	
gen relnPOP = lnPOP-lnPOP_nat
	la var relnPOP "Relative Log Population"		
	
	
*** Generate lagged control variables ***

tsset fips year	

gen reldNFE_L1 = l1.relnNFE - l2.relnNFE
	la var reldNFE "Change in relative (log) employment growth"

gen reldCBUR_L1 =  l1.relCBUR - l2.relCBUR
	la var reldCBUR "Change in relative claims-based unemployment rate"

gen reldPOP_L1 =  l1.relnPOP - l2.relnPOP
	la var reldPOP "Change in relative (log) population growth"
	
gen reldNFE_L2 = l2.relnNFE - l3.relnNFE
	la var reldNFE "Change in relative (log) employment growth"

gen reldCBUR_L2 =  l2.relCBUR - l3.relCBUR
	la var reldCBUR "Change in relative claims-based unemployment rate"

gen reldPOP_L2 =  l2.relnPOP - l3.relnPOP
	la var reldPOP "Change in relative (log) population growth"	

	
*** Generate LHS cumulated variables before any sample splits ***

forvalues h = 0/11{

	***** Relative Employment *****
	
	gen cNFE`h' = f`h'.relnNFE - l1.relnNFE

	***** Relative Population *****
	
	gen cPOP`h' = f`h'.relnPOP - l1.relnPOP
	
	***** Relative Unemployment *****
	
	gen cUR`h' = f`h'.relCBUR - l1.relCBUR
	
}

save "`dirdata'LPIV_DATA.dta", replace
